# Direct outputs not eligible for public release

# E.g., to run
# nohup R CMD BATCH --no-save --no-restore '--args outcomes=c("per_adult_total_empl_income")' & #nolint

args <- (commandArgs(TRUE))
if (length(args) > 0) {
    for (i in 1:length(args)) {
        eval(parse(text = args[[i]]))
    }
}


# GLOBAL SETTINGS --------------------------------------------------------------

options(
    scipen = 999,
    digits = 16,
    max.print = .Machine$integer.max,
    show.error.locations = TRUE,
    warn = 1
)

RNGkind("L'Ecuyer-CMRG")
seed <- 818675309L
set.seed(seed) # setting main seed

# PACKAGES ---------------------------------------------------------------------
library(data.table)
library(Matrix)
library(zoo)
library(multiwayvcov)
library(lmtest)
library(checkmate)
library(futile.logger)
library(lfe)

library(remotes)
remotes::install_github("setzler/eventStudy/eventStudy")
library(eventStudy) # for DiD-IV

# PACKAGE SETTINGS -------------------------------------------------------------

# data.table
setDTthreads(threads = 1L)
options(datatable.print.class = TRUE, datatable.print.keys = TRUE)
# so that printing the data.table also shows the variable type on top

# BEGIN FILE -------------------------------------------------------------------

base::source("~/code/0-utility-functions/wald_es.R", local = TRUE)

MakeUnearnedIncomeAgeEffects <- function(outcome) {

    # Read in lottery panel data
    lottery_panel <- readRDS("~/population-panel-data/lottery_panel_data.rds")

    # For all outcomes, replace with zero if missing
    if (
        (class(lottery_panel[[outcome]]) == "integer") ||
            (class(lottery_panel[[outcome]]) == "numeric")
    ) {
        lottery_panel[is.na(get(outcome)), (outcome) := 0]
    }

    # Similarly, for AGI (used to construct quartiles)
    lottery_panel[is.na(per_adult_adjgross), per_adult_adjgross := 0]

    # Consider two roughly-split age groups (e.g. younger and older winners)
    lottery_panel[, age_case1 := as.logical(between(age, 30, 46, incbounds = TRUE))]
    lottery_panel[, age_case2 := as.logical(between(age, 47, 64, incbounds = TRUE))]

    # Produce deliberately aggregated estimates
    collapse_table <-
        data.table(
            a = c("one_to_two", "three_to_five", "post_avg"),
            b = c(list(1:2), list(3:5), list(1:5))
        )

    # Run stacked event-study regression
    did_results_temp_young <-
        Wald_ES2(
            long_data = copy(lottery_panel),
            outcomevar = outcome,
            unit_var = "tin",
            cal_time_var = "tax_yr",
            onset_time_var = "win_yr",
            cluster_vars = "tin",
            omitted_event_time = -2,
            discrete_covars = "age",
            control_subset_var = "age_case1",
            control_subset_event_time = 0,
            treated_subset_var = "age_case1",
            treated_subset_event_time = 0,
            heterogeneous_only = TRUE,
            anticipation = 0,
            endog_var = "L_ann_multiperiod",
            calculate_collapse_estimates = TRUE,
            collapse_inputs = collapse_table
        )

    did_results_temp_old <-
        Wald_ES2(
            long_data = copy(lottery_panel),
            outcomevar = outcome,
            unit_var = "tin",
            cal_time_var = "tax_yr",
            onset_time_var = "win_yr",
            cluster_vars = "tin",
            omitted_event_time = -2,
            discrete_covars = "age",
            control_subset_var = "age_case2",
            control_subset_event_time = 0,
            treated_subset_var = "age_case2",
            treated_subset_event_time = 0,
            heterogeneous_only = TRUE,
            anticipation = 0,
            endog_var = "L_ann_multiperiod",
            calculate_collapse_estimates = TRUE,
            collapse_inputs = collapse_table
        )

    # Above results contain more than is needed for this step, so focus them
    # with a quick subsetting function

    MakeMainAgeEstimates <- function(did_dt) {

        # post-period avg collapsed estimate
        doutcome_dl_dt_collapsed <-
            setDT(did_dt[[1]])[
                rn == "att" &
                    model == "ratio" &
                    ref_onset_time == "Cohort-Weighted + Collapsed" &
                    grouping %in% collapse_table$a,
                .(ref_event_time, estimate, cluster_se, grouping)
            ]

        # Will use a 97, 98, 99 convention to get
        # the right order of results in tables
        doutcome_dl_dt_collapsed[
            grouping %in% c("one_to_two", "first_two"),
            ref_event_time := 97
        ]
        doutcome_dl_dt_collapsed[
            grouping == "three_to_five",
            ref_event_time := 98
        ]
        doutcome_dl_dt_collapsed[grouping == "post_avg", ref_event_time := 99]
        doutcome_dl_dt_collapsed[, grouping := NULL]

        return(doutcome_dl_dt_collapsed)
    }

    did_results_young <- MakeMainAgeEstimates(did_dt = did_results_temp_young)
    did_results_old <- MakeMainAgeEstimates(did_dt = did_results_temp_old)

    # Finally, combine together
    did_results_young[, age_group := "Younger Winners"]
    did_results_old[, age_group := "Older Winners"]

    did_results_combined <-
        rbindlist(
            list(
                did_results_young,
                did_results_old
            ),
            use.names = TRUE
        )

    saveRDS(
        did_results_combined,
        sprintf("~/estimation-output/unearned_income_effect_estimates_%s_by_age.rds", outcome)
    )

    return(outcome)
}

num_cores <- length(outcomes)
mcmapply(
    MakeUnearnedIncomeAgeEffects,
    outcomes,
    SIMPLIFY = FALSE,
    mc.silent = FALSE,
    mc.cores = num_cores,
    mc.set.seed = TRUE
)
